{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "import openai\n",
    "import groq\n",
    "import os\n",
    "import json\n",
    "from groq import Groq\n",
    "from llama_index.core import SimpleDirectoryReader\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "os.environ[\"GROQ_API_KEY\"] = \"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\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Summarize Files\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "reader = SimpleDirectoryReader(input_dir=\".\")\n",
    "documents = reader.load_data()\n",
    "doc_dicts = [{\"content\": d.text, **d.metadata} for d in documents]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "doc_dicts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "PROMPT = f\"\"\"\n",
    "The following is a list of file contents, along with their metadata. For each file, provide a summary of the contents.\n",
    "\n",
    "{doc_dicts}\n",
    "\n",
    "Return a JSON list with the following schema:\n",
    "\n",
    "```json\n",
    "{{\n",
    "  \"files\": [\n",
    "    {{\n",
    "      \"filename\": \"name of the file\",\n",
    "      \"summary\": \"summary of the content\"\n",
    "    }}\n",
    "  ]\n",
    "}}\n",
    "```\n",
    "\"\"\".strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "client = Groq(\n",
    "    api_key=os.environ.get(\"GROQ_API_KEY\"),\n",
    ")\n",
    "\n",
    "chat_completion = client.chat.completions.create(\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"system\",\n",
    "            \"content\": \"Always return JSON. Do not include any other text or formatting characters.\",\n",
    "        },\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": PROMPT,\n",
    "        },\n",
    "    ],\n",
    "    model=\"llama3-70b-8192\",\n",
    "    response_format={\"type\": \"json_object\"},\n",
    ")\n",
    "\n",
    "summaries = json.loads(chat_completion.choices[0].message.content)[\"files\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "summaries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create File Tree\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "PROMPT = f\"\"\"\n",
    "The following is a list of files and a summary of their contents. Read them carefully, then propose a directory structure that optimally organizes the files using known conventions and best practices.\n",
    "\n",
    "{summaries}\n",
    "\n",
    "You will solve this task by adding a `path` key to the JSON object below. The value of the `path` key should be the path to the file that you think is the most relevant to the summary.\n",
    "\"\"\".strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "client = Groq(\n",
    "    api_key=os.environ.get(\"GROQ_API_KEY\"),\n",
    ")\n",
    "\n",
    "chat_completion = client.chat.completions.create(\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"system\",\n",
    "            \"content\": \"Always return JSON. Do not include any other text or formatting characters.\",\n",
    "        },\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": PROMPT,\n",
    "        },\n",
    "    ],\n",
    "    model=\"llama3-70b-8192\",\n",
    "    # response_format={\"type\": \"json_object\"},\n",
    ")\n",
    "\n",
    "file_tree = json.loads(chat_completion.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "import pathlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'pathlib' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Input \u001b[0;32mIn [57]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m BASE_DIR \u001b[38;5;241m=\u001b[39m \u001b[43mpathlib\u001b[49m\u001b[38;5;241m.\u001b[39mPath(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      2\u001b[0m BASE_DIR\u001b[38;5;241m.\u001b[39mmkdir(exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m file_tree:\n",
      "\u001b[0;31mNameError\u001b[0m: name 'pathlib' is not defined"
     ]
    }
   ],
   "source": [
    "BASE_DIR = pathlib.Path(\"test_dir\")\n",
    "BASE_DIR.mkdir(exist_ok=True)\n",
    "\n",
    "for file in file_tree:\n",
    "    file[\"path\"] = pathlib.Path(file[\"path\"])\n",
    "    # Create file in specified base directory\n",
    "    (BASE_DIR / file[\"path\"]).parent.mkdir(parents=True, exist_ok=True)\n",
    "    with open(BASE_DIR / file[\"path\"], \"w\") as f:\n",
    "        f.write(\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "json.loads('{\\n        \"file_path\": \"/Users/iyaja/Git/llama-fs/LICENSE\",\\n        \"summary\": \"MIT License\"\\n    },\\n    {\\n        \"file_path\": \"/Users/iyaja/Git/llama-fs/main.py\",\\n        \"summary\": \"Python script\"\\n    },\\n    {\\n        \"file_path\": \"/Users/iyaja/Git/llama-fs/requirements.txt\",\\n        \"summary\": \"List of dependencies\"\\n    },\\n    {\\n        \"file_path\": \"/Users/iyaja/Git/llama-fs/scratch.ipynb\",\\n        \"summary\": \"Jupyter notebook\"\\n    }')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test Server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "requests.post(\n",
    "    \"http://127.0.0.1:8000/batch\",\n",
    "    json={\"path\": \"/Users/iyaja/Git/llama-fs/sample_data\"},\n",
    ").json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "ename": "ChunkedEncodingError",
     "evalue": "(\"Connection broken: InvalidChunkLength(got length b'', 0 bytes read)\", InvalidChunkLength(got length b'', 0 bytes read))",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:761\u001b[0m, in \u001b[0;36mHTTPResponse._update_chunk_length\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    760\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 761\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchunk_left \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mline\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    762\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m:\n\u001b[1;32m    763\u001b[0m     \u001b[38;5;66;03m# Invalid chunked protocol response, abort.\u001b[39;00m\n",
      "\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 16: b''",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mInvalidChunkLength\u001b[0m                        Traceback (most recent call last)",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:444\u001b[0m, in \u001b[0;36mHTTPResponse._error_catcher\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    443\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 444\u001b[0m     \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[1;32m    446\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SocketTimeout:\n\u001b[1;32m    447\u001b[0m     \u001b[38;5;66;03m# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but\u001b[39;00m\n\u001b[1;32m    448\u001b[0m     \u001b[38;5;66;03m# there is yet no clean way to get at it from this context.\u001b[39;00m\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:828\u001b[0m, in \u001b[0;36mHTTPResponse.read_chunked\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m    827\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 828\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_update_chunk_length\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    829\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchunk_left \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:765\u001b[0m, in \u001b[0;36mHTTPResponse._update_chunk_length\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    764\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m--> 765\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidChunkLength(\u001b[38;5;28mself\u001b[39m, line)\n",
      "\u001b[0;31mInvalidChunkLength\u001b[0m: InvalidChunkLength(got length b'', 0 bytes read)",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mProtocolError\u001b[0m                             Traceback (most recent call last)",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/requests/models.py:816\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    815\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 816\u001b[0m     \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    817\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:624\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m    623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchunked \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msupports_chunked_reads():\n\u001b[0;32m--> 624\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m line \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mread_chunked(amt, decode_content\u001b[38;5;241m=\u001b[39mdecode_content):\n\u001b[1;32m    625\u001b[0m         \u001b[38;5;28;01myield\u001b[39;00m line\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:857\u001b[0m, in \u001b[0;36mHTTPResponse.read_chunked\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m    856\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_original_response:\n\u001b[0;32m--> 857\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_original_response\u001b[38;5;241m.\u001b[39mclose()\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/contextlib.py:137\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m    136\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 137\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgen\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mthrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtyp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraceback\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    138\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m    139\u001b[0m     \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m    140\u001b[0m     \u001b[38;5;66;03m# was passed to throw().  This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m    141\u001b[0m     \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/urllib3/response.py:461\u001b[0m, in \u001b[0;36mHTTPResponse._error_catcher\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    459\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (HTTPException, SocketError) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    460\u001b[0m     \u001b[38;5;66;03m# This includes IncompleteRead.\u001b[39;00m\n\u001b[0;32m--> 461\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection broken: \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m e, e)\n\u001b[1;32m    463\u001b[0m \u001b[38;5;66;03m# If no exception is thrown, we should avoid cleaning up\u001b[39;00m\n\u001b[1;32m    464\u001b[0m \u001b[38;5;66;03m# unnecessarily.\u001b[39;00m\n",
      "\u001b[0;31mProtocolError\u001b[0m: (\"Connection broken: InvalidChunkLength(got length b'', 0 bytes read)\", InvalidChunkLength(got length b'', 0 bytes read))",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mChunkedEncodingError\u001b[0m                      Traceback (most recent call last)",
      "Input \u001b[0;32mIn [88]\u001b[0m, in \u001b[0;36m<cell line: 7>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mpost(url, json\u001b[38;5;241m=\u001b[39mdata, stream\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m r:\n\u001b[1;32m      8\u001b[0m     r\u001b[38;5;241m.\u001b[39mraise_for_status()  \u001b[38;5;66;03m# This will raise an exception for HTTP errors\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m line \u001b[38;5;129;01min\u001b[39;00m r\u001b[38;5;241m.\u001b[39miter_lines(decode_unicode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[1;32m     10\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m line:\n\u001b[1;32m     11\u001b[0m             \u001b[38;5;66;03m# Load the JSON object from the line\u001b[39;00m\n\u001b[1;32m     12\u001b[0m             \u001b[38;5;28mprint\u001b[39m(json\u001b[38;5;241m.\u001b[39mloads(line))\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/requests/models.py:865\u001b[0m, in \u001b[0;36mResponse.iter_lines\u001b[0;34m(self, chunk_size, decode_unicode, delimiter)\u001b[0m\n\u001b[1;32m    856\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Iterates over the response data, one line at a time.  When\u001b[39;00m\n\u001b[1;32m    857\u001b[0m \u001b[38;5;124;03mstream=True is set on the request, this avoids reading the\u001b[39;00m\n\u001b[1;32m    858\u001b[0m \u001b[38;5;124;03mcontent at once into memory for large responses.\u001b[39;00m\n\u001b[1;32m    859\u001b[0m \n\u001b[1;32m    860\u001b[0m \u001b[38;5;124;03m.. note:: This method is not reentrant safe.\u001b[39;00m\n\u001b[1;32m    861\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    863\u001b[0m pending \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 865\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miter_content(\n\u001b[1;32m    866\u001b[0m     chunk_size\u001b[38;5;241m=\u001b[39mchunk_size, decode_unicode\u001b[38;5;241m=\u001b[39mdecode_unicode\n\u001b[1;32m    867\u001b[0m ):\n\u001b[1;32m    869\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m pending \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    870\u001b[0m         chunk \u001b[38;5;241m=\u001b[39m pending \u001b[38;5;241m+\u001b[39m chunk\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/requests/utils.py:567\u001b[0m, in \u001b[0;36mstream_decode_response_unicode\u001b[0;34m(iterator, r)\u001b[0m\n\u001b[1;32m    564\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Stream decodes an iterator.\"\"\"\u001b[39;00m\n\u001b[1;32m    566\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m r\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 567\u001b[0m     \u001b[38;5;28;01myield from\u001b[39;00m iterator\n\u001b[1;32m    568\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m    570\u001b[0m decoder \u001b[38;5;241m=\u001b[39m codecs\u001b[38;5;241m.\u001b[39mgetincrementaldecoder(r\u001b[38;5;241m.\u001b[39mencoding)(errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreplace\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[0;32m~/mambaforge/lib/python3.9/site-packages/requests/models.py:818\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    816\u001b[0m     \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    817\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 818\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n\u001b[1;32m    819\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m DecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    820\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m ContentDecodingError(e)\n",
      "\u001b[0;31mChunkedEncodingError\u001b[0m: (\"Connection broken: InvalidChunkLength(got length b'', 0 bytes read)\", InvalidChunkLength(got length b'', 0 bytes read))"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"http://127.0.0.1:8000/watch\"\n",
    "data = {\"path\": \"/Users/iyaja/Git/llama-fs/sample_data\"}\n",
    "\n",
    "with requests.post(url, json=data, stream=True) as r:\n",
    "    r.raise_for_status()  # This will raise an exception for HTTP errors\n",
    "    for line in r.iter_lines(decode_unicode=True):\n",
    "        if line:\n",
    "            # Load the JSON object from the line\n",
    "            print(json.loads(line))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fix the mess that is Llama Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "from llama_index.core import SimpleDirectoryReader\n",
    "from llama_index.core import Document\n",
    "from llama_index.core.node_parser import TokenTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "splitter = TokenTextSplitter(chunk_size=6144)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "reader = SimpleDirectoryReader(input_dir=\".\", recursive=True)\n",
    "all_docs = []\n",
    "for docs in reader.iter_data():\n",
    "    # <do something with the documents per file>\n",
    "    if len(docs) > 1:\n",
    "        text = splitter.split_text(\"\\n\".join([d.text for d in docs]))[0]\n",
    "        docs = [Document(text=text, metadata=docs[0].metadata)]\n",
    "    all_docs.extend(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
